import random
import math
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
import numpy as np
import pandas as pd
import sklearn
import seaborn as sns
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
all_data = pd.read_csv("HDFCBANK.csv")
all_data.head(20)
x=all_data.drop(['Date','Trades','Deliverable Volume','%Deliverble','Close','Symbol','Series'],axis=1)
x.head()
y=all_data['Close']
y.head()
import sklearn
from sklearn.preprocessing import MinMaxScaler
scld=MinMaxScaler(feature_range=(0,1))
arr_scld=scld.fit_transform(x)
x_scld=pd.DataFrame(arr_scld,columns=x.columns)
x_scld.head()
x_scld.describe()
train_x,test_x,train_y,test_y=train_test_split(x_scld,y,test_size=0.20,random_state=1)
train_x.shape
test_x.shape
train_y.shape
test_y.shape
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor(random_state=44)
model.fit(train_x, train_y)
predicted_values=model.predict(test_x)
predicted_values
#visualize the decision tree graph
import graphviz
with open('tree.dot','w') as dotfile:
tree.export_graphviz(model,out_file=dotfile,feature_names=train_x.columns,filled=True)
dotfile.close()
from graphviz import Source
with open('tree.dot','r') as f:
text=f.read()
plot=Source(text)
plot
STEP6-EVALUATE MODEL PERFORMANCE for DecisionTreeRegressor
MEAN ABSOLUTE ERROR (MAE) FOR TEST DATA
from sklearn.metrics import mean_absolute_error
print("MEAN ABSOLUTE ERROR (MAE) FOR TEST DATA IS")
np.round(mean_absolute_error(predicted_values,test_y),3)
MEAN SQUARE ERROR (MSE) FOR TEST DATA
from sklearn.metrics import mean_squared_error
print("MEAN SQUARE ERROR (MSE) FOR TEST DATA IS")
mse=np.round(mean_squared_error(predicted_values,test_y),3)
mse
ROOT MEAN SQUARE ERROR (MSE) FOR TEST DATA
import math
math.sqrt(mse)
MEDIAN ABSOLUTE ERROR (MAE) FOR TEST DATA
from sklearn.metrics import median_absolute_error
print("MEDIAN ABSOLUTE ERROR (MAE) FOR TEST DATA IS")
np.round(median_absolute_error(predicted_values,test_y),3)
from sklearn.metrics import r2_score
print("R-square FOR TEST DATA IS")
#np.round(r2_score(y_test,y_pred),3)
np.round(r2_score(predicted_values,test_y),3)
train_x,test_x,train_y,test_y=train_test_split(x_scld,y,test_size=0.20,random_state=1)
train_x.shape
test_x.shape
train_y.shape
test_y.shape
#we will also split our data into train and test datasets and Build the model on train dataset and apply the model on test datset.
from sklearn import tree
import pydotplus
from sklearn.model_selection import GridSearchCV
my_max_depth=[2,5,10,15,20,25]
my_criterion=["squared_error", "friedman_mse", "absolute_error","poisson"]
#BUILDING THE MODEL
mytreeone=DecisionTreeRegressor()
#CROSS VALIDATION TELLS HOW A MODEL PERFORMS ON A DATASET USING MULTIPLE SAMPLES OF TRAIN DATA
model=GridSearchCV(estimator=mytreeone,cv=3,param_grid=dict(max_depth=my_max_depth,criterion=my_criterion))
model.fit(train_x, train_y)
predicted_values=model.predict(test_x)
predicted_values
STEP6-EVALUATE MODEL PERFORMANCE for GridSearchCV
MEAN ABSOLUTE ERROR (MAE) FOR TEST DATA
from sklearn.metrics import mean_absolute_error
print("MEAN ABSOLUTE ERROR (MAE) FOR TEST DATA IS")
np.round(mean_absolute_error(predicted_values,test_y),3)
MEAN SQUARE ERROR (MSE) FOR TEST DATA
from sklearn.metrics import mean_squared_error
print("MEAN SQUARE ERROR (MSE) FOR TEST DATA IS")
mse=np.round(mean_squared_error(predicted_values,test_y),3)
mse
ROOT MEAN SQUARE ERROR (MSE) FOR TEST DATA
import math
math.sqrt(mse)
MEDIAN ABSOLUTE ERROR (MAE) FOR TEST DATA
from sklearn.metrics import median_absolute_error
print("MEDIAN ABSOLUTE ERROR (MAE) FOR TEST DATA IS")
np.round(median_absolute_error(predicted_values,test_y),3)
from sklearn.metrics import r2_score
print("R-square FOR TEST DATA IS")
#np.round(r2_score(y_test,y_pred),3)
np.round(r2_score(predicted_values,test_y),3)
n_estimators=[10,500,100,2000]
criterion=["squared_error", "absolute_error", "poisson"]
min_samples_split=[2,100,20,50,100]
max_depth=[20,30,40,50]
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
param_grid={'n_estimators':n_estimators,'criterion':criterion,'min_samples_split':min_samples_split,'max_depth':max_depth}
rf=RandomForestRegressor()
model=RandomizedSearchCV(rf,param_grid,cv=5,verbose=2,n_jobs=1)
model.fit(train_x, train_y)
predicted_values=model.predict(test_x)
predicted_values
STEP6-EVALUATE MODEL PERFORMANCE for RandomizedSearchCV
MEAN ABSOLUTE ERROR (MAE) FOR TEST DATA
from sklearn.metrics import mean_absolute_error
print("MEAN ABSOLUTE ERROR (MAE) FOR TEST DATA IS")
np.round(mean_absolute_error(predicted_values,test_y),3)
MEAN SQUARE ERROR (MSE) FOR TEST DATA
from sklearn.metrics import mean_squared_error
print("MEAN SQUARE ERROR (MSE) FOR TEST DATA IS")
mse=np.round(mean_squared_error(predicted_values,test_y),3)
mse
ROOT MEAN SQUARE ERROR (MSE) FOR TEST DATA
import math
math.sqrt(mse)
MEDIAN ABSOLUTE ERROR (MAE) FOR TEST DATA
from sklearn.metrics import median_absolute_error
print("MEDIAN ABSOLUTE ERROR (MAE) FOR TEST DATA IS")
np.round(median_absolute_error(predicted_values,test_y),3)
from sklearn.metrics import r2_score
print("R-square FOR TEST DATA IS")
#np.round(r2_score(y_test,y_pred),3)
np.round(r2_score(predicted_values,test_y),3)
import xgboost as xg
model = xg.XGBRegressor(objective ='reg:linear',seed = 123,booster='gbtree',max_depth=5,n_estimators=200,random_state=42)
model.fit(train_x, train_y)
predicted_values=model.predict(test_x)
predicted_values
STEP6-EVALUATE MODEL PERFORMANCE for xgboost
MEAN ABSOLUTE ERROR (MAE) FOR TEST DATA
from sklearn.metrics import mean_absolute_error
print("MEAN ABSOLUTE ERROR (MAE) FOR TEST DATA IS")
np.round(mean_absolute_error(predicted_values,test_y),3)
MEAN SQUARE ERROR (MSE) FOR TEST DATA
from sklearn.metrics import mean_squared_error
print("MEAN SQUARE ERROR (MSE) FOR TEST DATA IS")
mse=np.round(mean_squared_error(predicted_values,test_y),3)
mse
ROOT MEAN SQUARE ERROR (MSE) FOR TEST DATA
import math
math.sqrt(mse)
MEDIAN ABSOLUTE ERROR (MAE) FOR TEST DATA
from sklearn.metrics import median_absolute_error
print("MEDIAN ABSOLUTE ERROR (MAE) FOR TEST DATA IS")
np.round(median_absolute_error(predicted_values,test_y),3)
from sklearn.metrics import r2_score
print("R-square FOR TEST DATA IS")
#np.round(r2_score(y_test,y_pred),3)
np.round(r2_score(predicted_values,test_y),3)
!jupyter nbconvert tockMarketPrediction_DT_GRIDSEARCHCV_RF_XGBOOST.ipynb